Chaudhari M., Crowley J. and Hoering A. A Comparative Study on the SWOG Two-Stage Design Extension to Stop Early for Efficacy in Single Arm Phase II Trials. Statistics in Biopharmaceutical Research, DOI: 10.1080/19466315.2020.1865194, 2021.
Green SJ., Benedetti J. and Crowley J. Clinical Trials in Oncology. Chapman and Hall, 1997.
Green SJ. and Dahlberg S. Planned Versus Attained Design in Phase II Clinical Trials. Statistics in Medicine 11:853-862, 1992.
The Two Stage calculator computes estimates of sample size for a two-stage study. The calculator then computes critical values for rejection of \(H_{0}\) or of \(H_{a}\) at the end of each of the stages of the trial- R1 and A1 for the first stage, and R2 and A2 for the second stage. The calculator then computes the exact alpha and power for the trial, and exact power for an array of values of \(p_{a}\) compared to a fixed \(p_{0}\).
The design framework allows the user to determine whether the design will allow stopping at the end of the first stage for futility only, or for futility and efficacy. Critical values and design probability calculations will be presented meeting the framework of the option selected.
There are three calculation steps. The first step determines the sample size in each stage for the design. The second step determines SWOG critical values for the design (which can be modified by the user). The final step calculates design probabilities for the given study.
The user may customize the outputs at the end of the first and/or second calculation step, and recalculate subsequent values accordingly.
The calculator is based on the approach of Green and Dahlberg (1992), and is designed to produce a design with overall \(\alpha\) \(\approx\) 0.05. This design approach features maximum allowable error thresholds of 0.02 and 0.055, respectively, for the first and second stages of the design. In practice, there are cases where a two-stage design cannot be found under these constraints, but would be possible if a threshold for the first stage of greater than 0.02 were used; in these cases, the calculator will produce a warning message that a design with the recommended stage 1 error threshold of 0.02 is impossible, and will instead return a design with an updated stage 1 error threshold.
The user is prompted for values to the following items. For items that have initial default values set, the values are given in parentheses.
Sample size values can be modified after the initial calculation is completed. Choose “Re-Calculate with Updated Sample Size” to get new critical values and exact power calculations after updating \(N_{1}\) and/or \(N_{2}\).
Critical values can be modified after the initial calculation is completed. Choose “Re-Calculate Probabilities” after making any desired adjustments to any of the critical values \(A_{1}\), \(A_{2}\), \(R_{1}\), or \(R_{2}\).
\(A_{1}\): If the number of successes after completing the first stage is \(\le\) \(A_{1}\), we reject the alternative hypothesis that p \(\ge\) \(p_{a}\), and end the trial at the first stage without continued enrollment into the second stage.
\(R_{1}\): If the number of successes after completing the first stage is \(\ge\) \(R_{1}\), we reject the null hypothesis that p \(\le\) \(p_{0}\), and end the trial at the first stage without continued enrollment into the second stage. Note that for designs of futility only, this will be set by default to N1 + 1, as conclusions for efficacy at the end of Stage 1 would only be allowed within the framework of a futility + efficacy design.
\(A_{2}\): If the number of successes after completing the second stage is \(\le\) \(A_{2}\), we reject the alternative hypothesis that p \(\ge\) \(p_{a}\).
\(R_{2}\): If the number of successes after completing the second stage is \(\ge\) \(R_{2}\), we reject the null hypothesis that p \(\le\) \(p_{0}\).
The program is written in R.
View Code
function(design = 1, p_H0, p_Ha, alpha, input_power, n1n2 = NA, a1 = NA, a2 = NA, r1 = NA, thrsh_err1 = 0.02) {
# Function to calculate the exact alpha, power, and probabilities of early termination
calcprobs <- function(p0, pa, alpha, tn1, tn2, a1, a2, r1) {
vPower <- vPa <- vEarlyH0Pa <- vEarlyHaPa <- c()
vPa <- p0 + 0.05 * c(0:7)
## vPa[1] = p0
if (!round(pa, 2) %in% round(vPa, 2)) {
vPa <- c(vPa[1], sort(c(pa, p0 + 0.05 * c(1:7))))
}
vPa[vPa > 1] <- 1
vPower <- rep(0, length(vPa))
EarlyH0P0 <- stats::pbinom(a1, tn1, p0)
temp <- tn1 + 1
if (r1 > temp) {
EarlyHaP0 <- 0
}
else{
EarlyHaP0 <- stats::pbinom(r1 - 1, tn1, p0, lower.tail = FALSE)
}
if (tn1 == 0) {
tn1 <- tn2
tn2 <- 0
}
if (tn2 == 0) {
AlphaProb <- stats::pbinom(a1, tn1, p0, lower.tail = FALSE)
EarlyH0P0 <- 0
EarlyHaP0 <- 0
for (j in 2:length(vPa)) {
vEarlyH0Pa[j] <- 0
vEarlyHaPa[j] <- 0
vPower[j] <- stats::pbinom(r1 - 1, tn1, vPa[j], lower.tail = FALSE)
}
}
else{
p1 <- 0
p2p1 <- 0
a1p2 <- a1 + 2
mn <- min(tn1, a2, r1 - 1)
for (j in 1:length(vPa)) {
## j = 1 corresponds to H0: p = p0
p1 <- stats::pbinom(a1, tn1, vPa[j]) # P(X<=a1|n1,pa) = Type 2 error
if (r1 > (a1 + 1)) {
tP1 <- stats::pbinom(a2 - a1 - 1, tn2, vPa[j]) #P(X2<=a2-(a1+1)|n2,pa)
p2p1 <- tP1 * stats::dbinom(a1 + 1, tn1, vPa[j]) # p2p1 <- tP1 * (tP2 - tP3);
}
if (a1p2 <= mn) {
for (i in a1p2:mn) {
tP1 <- stats::pbinom(a2 - i, tn2, vPa[j])
p2p1 <- p2p1 + (tP1 * stats::dbinom(i, tn1, vPa[j]))
# beta = P(accept H0|Ha)
}
}
### p2p1<-P(X1+X2 <= a2|X1 = a1+1,n2,Pa[1],r1>=a1+2) * P(X1 = a1+1|n1,Pa[1],r1>=a1+2) + P(X1+X2 <= a2|X1 = a1+2,n2,Pa[1]) * P(X1 = a1+2|n1,Pa[1]) + P(X1+X2 <= a2|X1 = a1+3,n2,Pa[1]) * P(X1 = a1+3|n1,Pa[1]) +...+ P(X1+X2 <= a2|X1 = a2,n2,Pa[1]) * P(X1 = a2|n1,Pa[1])
vPower[j] <- 1 - (p1 + p2p1)
# 1 - (Stage 1 Type2 Error + Stage 2 Type2 Error); no dependency on r1, r2
vEarlyH0Pa[j] <- stats::pbinom(a1, tn1, vPa[j])
if (r1 > (tn1 + 1)) {
vEarlyHaPa[j] <- 0
}
else{
vEarlyHaPa[j] <- stats::pbinom(r1 - 1, tn1, vPa[j], lower.tail = FALSE) # P(X1 > r1-1|n1,pa) = P(X1 >= r1|n1,pa)
}
}
AlphaProb <- vPower[1] #1 - (p1 + p2p1);
}
return(list("AlphaProb" = round(AlphaProb * 1000) / 1000, "EarlyH0P0" = round(EarlyH0P0 * 1000) / 1000, "EarlyHaP0" = round(EarlyHaP0 * 1000) / 1000, "vPower" = round(vPower[2:length(vPa)] * 1000) / 1000, "vPa" = round(vPa[2:length(vPa)] * 1000) / 1000, "vEarlyH0Pa" = round(vEarlyH0Pa[2:length(vPa)] * 1000) / 1000, "vEarlyHaPa" = round(vEarlyHaPa[2:length(vPa)] * 1000) / 1000))
}
thrsh_err2 = alpha + 0.005
beta = 1 - input_power
GD.plnd.smry <- GD.atnd.smry <- GD.atnd.smry1 <- GD.atnd.smry2 <- GD.issue <- GD.recalc_values <- GD.probs <- c()
for (l in 1:length(alpha)) {
for (k in 1:length(p_H0)) {
if (all(is.na(n1n2))) {
# Calculate sample size for each stage of a two-stage study design
#------------------------------------------------------------------
# Binomial sample size assuming single staged design
za = abs(stats::qnorm(alpha[l]))
zb = abs(stats::qnorm(beta[l]))
y1 = atan(sqrt(p_H0[k] / (1 - p_H0[k])))
y2 = atan(sqrt(p_Ha[k] / (1 - p_Ha[k])))
n = ((zb * 0.5001) + (za * 0.5001)) / (y2 - y1)
n = floor(n * n) + 1.
# First stage sample size n1 for a two-stage design
x = floor(n / 5)
if (n - x * 5 > 0) {
x <- x + 1
}
n0 <- 5 * x
y <- floor(n0 / 10)
if (n0 - (floor(n0 / 10) * 10) == 0) {
tn1 = n0 / 2
}
else{
tn1 = (n0 + 5) / 2
}
# Second stage sample size n2 for a two-stage design
x <- floor(n / 5)
if (n - x * 5 > 0) {
x <- x + 1
}
n0 <- 5 * x
tn2 = n0 - tn1
# Set the sample sizes
GD.n1n2 = c(tn1, tn2)
} else{
GD.n1n2 <- n1n2
}
GD.AttainedN <- cbind("N" = sum(GD.n1n2), "n1" = GD.n1n2[1], "n2" = GD.n1n2[2], "M" = sum(GD.n1n2))
GD.AttainedN <- kronecker(GD.AttainedN, 1)
GD.AttainedN <- cbind(GD.AttainedN, "m1" = GD.AttainedN[, 2])
GD.AttainedN <- as.data.frame(cbind(GD.AttainedN, "m2" = GD.AttainedN[, 4] - GD.AttainedN[, 5]))
colnames(GD.AttainedN) <- c("N", "n1", "n2", "M", "m1", "m2")
invalid.cnt <- nrow(GD.AttainedN[!(GD.AttainedN$m1 > 0 & GD.AttainedN$m2 > 0), ])
if (invalid.cnt >= 1) {
GD.issue = c(GD.issue, "The attained design includes erroneous range for either N or n1. Please specify correctly!")
GD.AttainedN <- GD.AttainedN[GD.AttainedN$m1 > 0 & GD.AttainedN$m2 > 0, ]
}
GD.est <- c()
GD.optim <- NA
if (nrow(GD.AttainedN) == 0) {
y2calc = y1 + (((zb * 0.5001) + (za * 0.5001)) / sqrt(5))
p_HA_min=floor(100*((tan(y2calc))^2/(1+(tan(y2calc))^2)))/100
return(list(result = GD.plnd.smry,
result_table = GD.probs,
result_issue = paste0("Your study is not possible as specified (p0 = ", p_H0[k], ", pa = ", p_Ha[k], ", alpha = ", alpha[l], "). ",
"A suitable two-stage design exists for a design with pa = ", p_HA_min, " keeping the other parameters the same.")))
} else {
for (j in 1:nrow(GD.AttainedN)) {
# Calculate critical values (Vector of sample size and error probability thresholds for stage 1 and stage 2 to establish early futility)
issue_message = c()
issue_ta1 <- 0
Test <- 0
ta1 <- -1
while (TRUE) {
if ((Test > thrsh_err1) | (ta1 >= (GD.AttainedN$m1[j] - 1)))
break
ta1 <- ta1 + 1
bp <- stats::pbinom(ta1, GD.AttainedN$m1[j], p_Ha[k])
if (bp == -1) {
return(FALSE)
} else{
Test <- bp
}
}
ta1 <- ta1 - 1
if (ta1 == -1) {
issue_message = c(issue_message, paste0("ta1 = ", ta1))
ta1 <- 0
issue_ta1 <- 1
}
p_rule1 <- Test
if(Test <= thrsh_err1){
issue_message = c(issue_message, paste0("Test = ", Test, " < ", thrsh_err1))
}
tr1 <- GD.AttainedN$m1[j] + 1
tr2 <- ta1 + 1
Test <- 1.
while (TRUE) {
if ((Test < thrsh_err2) | (tr2 >= (GD.AttainedN$m1[j] + GD.AttainedN$m2[j] - 1)))
break
tr2 <- tr2 + 1
bp <- stats::pbinom(tr2 - 1, GD.AttainedN$m1[j] + GD.AttainedN$m2[j], p_H0[k])
if (bp == -1) {
return(FALSE)
} else{
Test <- 1 - bp
}
}
if(tr2 == (ta1 + 2)){
issue_message = c(issue_message, paste0("tr2 = ", tr2, " == ta1 = ", ta1, " + 2"))
}
p_rule2 <- Test
if(Test >= thrsh_err2){
issue_message = c(issue_message, paste0("Test = ", Test, " >= ", thrsh_err2))
}
ta2 <- tr2 - 1
tr1 <- GD.AttainedN$m1[j] + 1
if (length(issue_message) > 0) {
if (issue_ta1==1) {
stage1_message <- ceiling(10000*stats::pbinom(0,GD.AttainedN$m1[j], p_Ha[k]))/10000
issue_message = paste(issue_message, collapse = " / ")
GD.issue = c(GD.issue, paste0("Your study is not possible as specified. ",
"For a design with these test parameters, please enter a Stage 1 error threshold of at least ",
stage1_message, " to produce a valid design."))
GD.recalc_values <- data.frame(thrsh_err1 = stage1_message)
}
else{
issue_message = paste(issue_message, collapse = " / ")
GD.issue = c(GD.issue, paste0("Your study is not possible as specified. ",
"You must change the parameters to get the correct level."))
}
}
GD.bdry.p = c(ta1, ta2, tr1, tr2, p_rule1, p_rule2)
GDrules.p <- GD.bdry.p[5:6]
if (is.na(a1)) {
GD.a1 = GD.bdry.p[1]
} else {
GD.a1 = a1
}
GD.a1 = round(GD.a1, 0)
if (is.na(a2)) {
GD.a2 = GD.bdry.p[2]
} else {
GD.a2 = a2
}
GD.a2 = round(GD.a2, 0)
if (is.na(r1)) {
GD.r1 = GD.bdry.p[3]
} else {
GD.r1 = r1
}
GD.r1 = round(GD.r1, 0)
GD.r2 = GD.bdry.p[4]
GD.r2 = round(GD.r2, 0)
GD.a1a2r1r2 = round(c(GD.a1, GD.a2, GD.r1, GD.r2), 0)
if (design == 1) {
GD.prob <- calcprobs(p_H0[k], p_Ha[k], alpha[l], GD.AttainedN$m1[j], GD.AttainedN$m2[j], GD.a1, GD.a2, GD.r1)
GD.probs = rbind(GD.probs, as.data.frame(GD.prob))
} else {
probs <- list()
iteration <- 1
probs[[iteration]] <- calcprobs(p_H0[k], p_Ha[k], alpha[l], GD.AttainedN$m1[j], GD.AttainedN$m2[j], GD.a1, GD.a2, GD.r1)
if (probs[[iteration]]$AlphaProb < 0) {
GD.issue = c(GD.issue, paste0("Negative Alpha (p0 = ", p_H0[k], ", pa = ", p_Ha[k], ", alpha = ", alpha[l], ", n1 = ", GD.AttainedN$m1[j], ", n2 = ", GD.AttainedN$m2[j], ", a1 = ", GD.a1, ", a2 = ", GD.a2, ", r1 = ", GD.r1, ")"))
next
}
while (probs[[iteration]]$AlphaProb > 0 & probs[[iteration]]$AlphaProb <= 0.055) {
iteration <- iteration + 1
GD.r1 <- GD.r1 - 1
probs[[iteration]] <- calcprobs(p_H0[k], p_Ha[k], alpha[l], GD.AttainedN$m1[j], GD.AttainedN$m2[j], GD.a1, GD.a2, GD.r1)
}
if (iteration > 1) {
GD.optim = list(GD.r1 + 1, probs[[iteration - 1]])
} else{
GD.optim = paste0("p0 = ", p_H0[k], ", pa = ", p_Ha[k], ", n1 = ", GD.AttainedN$m1[j], ", n2 = ", GD.AttainedN$m2[j], ", a1 = ", GD.a1, ", a2 = ", GD.a2, ", r1 = ", GD.r1, " does not meet optimization criteria: alpha = ", probs[[iteration]]$AlphaProb)
}
if (is.list(GD.optim)) {
GD.a1a2r1r2[3] <- GD.optim[[1]]
GD.prob <- GD.optim[[2]]
GD.probs = rbind(GD.probs, as.data.frame(GD.prob))
} else{
GD.issue = c(GD.issue, GD.optim)
GD.est <- rbind(GD.est, c(alpha[l], 1 - beta[l], p_H0[k], p_Ha[k], unlist(GD.AttainedN[j, ]), GD.a1a2r1r2, rep(NA, 10), GDrules.p))
next
}
}
alfa <- GD.prob$AlphaProb
PET.H0 <- GD.prob$EarlyH0P0 + GD.prob$EarlyHaP0
ass.H0 <- PET.H0 * GD.AttainedN$m1[j] + (1 - PET.H0) * GD.AttainedN$M[j]
tmp <- cbind(GD.prob[[4]], GD.prob[[5]], GD.prob[[6]], GD.prob[[7]])
pwr <- tmp[tmp[, 2] == p_Ha[k], 1]
EarlyH0Pa <- tmp[tmp[, 2] == p_Ha[k], 3]
EarlyHaPa <- tmp[tmp[, 2] == p_Ha[k], 4]
PET.Ha <- (EarlyH0Pa + EarlyHaPa)
ass.Ha <- PET.Ha * GD.n1n2[1] + (1 - PET.Ha) * sum(GD.n1n2)
GD.est <- rbind(GD.est, c(alpha[l], 1 - beta[l], p_H0[k], p_Ha[k], unlist(GD.AttainedN[j, ]), GD.a1a2r1r2, ass.H0, ass.Ha, alfa, pwr,
GD.prob$EarlyH0P0, GD.prob$EarlyHaP0, EarlyH0Pa, EarlyHaPa, PET.H0, PET.Ha, GDrules.p))
colnames(GD.est) <- c("alpha", "input_power", "p_H0", "p_Ha", "N", "n1", "n2", "M", "m1", "m2", "a1", "a2", "r1", "r2", "ASS_p_H0", "ASS_p_Ha",
"Exact_alpha", "power", "EarlyH0P0", "EarlyHaP0", "EarlyH0Pa", "EarlyHaPa", "PET_H0", "PET_Ha", "p(0_02 rule)", "p(0_055 rule)")
}
}
GD.plnd.smry <- rbind(GD.plnd.smry, GD.est[, -c(8:10)])
}
}
GD.plnd.smry = as.data.frame(GD.plnd.smry)
GD.plnd.smry$thrsh_err1 <- thrsh_err1
GD.plnd.smry$thrsh_err2 <- thrsh_err2
return(list(result = GD.plnd.smry,
result_table = GD.probs,
result_issue = list(issue_message = GD.issue,
recalc_values = GD.recalc_values)))
}